"""
爬虫,多线程
"""
import requests
import threading
import os
import time
import sys


class MulThreadDownload(threading.Thread):
    def __init__(self, url, startpos, endpos, fd):
        '''
        self:this,当前对象
        url:下载url
        startpos:head range开始的地方
        endpos:head range结束的地方
        fd:文件每部分对应生成的文件对象
        '''
        super(MulThreadDownload, self).__init__()
        self.url = url
        self.startpos = startpos
        self.endpos = endpos
        self.fd = fd

    def downloadRange(self):
        '''
        使用头范围下载指定范围的文件内容
        '''
        print("start thread:%s at %s" % (self.getName(), time.time()))

        # headers模拟浏览器,通过range来下载对应部分的文件
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/' +
            '537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36 Edg/87.0.664.66',
            "Range": "bytes=%s-%s" % (self.startpos, self.endpos)
        }
        res = requests.get(self.url, headers=headers)
        self.fd.seek(self.startpos)
        self.fd.write(res.content)
        self.fd.flush()  # 不加的话,下载的会不完整
        print("stop thread:%s at %s" % (self.getName(), time.time()))

    def run(self):
        '''
        重写线程的run()
        '''
        self.downloadRange()


def getSize(url):
    '''
    获取文件大小
    '''
    res = requests.head(url)
    size = res.headers['Content-Length']
    return int(size)


def download(url, saveDir, threadNum):
    '''
    文件分线程下载
    url:请求路径
    saveDir:文件保存目录
    threadNum:线程数
    '''
    fileName = url.split('/')[-1]
    fileSize = getSize(url)
    file = saveDir+fileName

    # 请空并生成文件
    tempf = open(file, 'w')
    tempf.close()

    # 线程相关初始化
    threading.BoundedSemaphore(threadNum)  # 信号量
    step = fileSize // threadNum  # 文件分块
    mtd_list = []  # 线程列表
    start = 0  # 起始位置
    end = -1  # 结束位置

    with open(file, 'rb+') as f:
        fileno = f.fileno()

        # 循环获取范围参数
        while end < fileSize-1:  # 如果文件大小为11字节，那就是获取文件0-10的位置的数据。如果end = 10，说明数据已经获取完了。
            start = end + 1
            end = start+step-1
            if end > fileSize:
                end = fileSize

            # 每部分生成文件对象
            dup = os.dup(fileno)  # 复制文件句柄
            fd = os.fdopen(dup, 'rb+', -1)  # -1使用系统自定义缓存

            mtdl = MulThreadDownload(url, start, end, fd)
            mtdl.start()
            mtd_list.append(mtdl)

        for t in mtd_list:
            t.join()


def createLogTxt(logDir):
    '''
    根据现在的时间生成日志文件
    logDir:日志文件目录
    return:logFile 返回log文件路径(目录加文件名)
    '''
    logTxtName = time.strftime("%Y-%m-%d %H_%M_%S", time.localtime())
    logFile = logDir+logTxtName+'.txt'
    with open(logFile, "a") as f:
        f.write('log初始化\n')
    return logFile


def log(logFile, logInfo):
    '''
    记录日志
    logFiel 日志文件路径
    '''
    timeStr = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
    # 以日期来创建log文件
    with open(logFile, "a") as f:
        f.write(timeStr+'-----'+logInfo)


if __name__ == "__main__":
    url = 'https://w.wallhaven.cc/full/6o/wallhaven-6oqzgq.jpg'
    saveDir = './DL/'

    logDir = './Log/'
    logFile = createLogTxt(logDir)

    res = requests.get(url)
    resCode = res.status_code
    try:
        if(resCode == 200):
            print('开始下载')
            download(url, saveDir, 4)
            info = '[%d],%s,下载成功' % (200, '文件名')
            print(info)
            log(logFile, info)
        elif(resCode == 404):
            info = '[404],%s,do not exit\n' % ('文件名')
            print(info)
            log(logFile, info)
        else:
            info = '其他[%d]' % (resCode)
            log(logFile, info)
    except Exception as e:
        info = '异常:{0}\n'.format(e)
        print(info)
        log(logFile, info)
